withr::with_dir(here::here(), {
targets::tar_load(c(
model_data_1ha, model_data_cf
))
})
In GAMs, the number of knots used for a smooth needs to be great enough to capture the “true” complexity of a relationship. However, higher number of knots increase computational demand. Here I’ll do some diagnostics and trial-and-error to select an adequate number of knots. I’m going to use models without random effects for computational efficiency.
s_cf <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf,
method = "fREML",
select = TRUE
)
set.seed(888)
k.check(s_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 3.467671 0.9622796 0.9675
## s(spei_history,L) 72 13.648312 NA NA
Looks like the default k is adequate for s(log_size_prev). Unfortunately k.check() doesn’t work for data with matrices, so I’ll use an alternative approach detailed in ?mgcv::choose.k. Briefly, it involves looking for pattern in residuals.
source(here::here("R", "utils.R"))
check_res_edf(s_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.8
We want this number to be close to 0. First let’s up the lag dimension number of knots.
s_cf2 <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 30),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf,
method = "fREML",
select = TRUE
)
edf(s_cf)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.47
## 2 s(spei_history,L) 13.6
edf(s_cf2)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.47
## 2 s(spei_history,L) 13.7
draw(s_cf, select = "s(spei_history,L)")
## Warning: Removed 939 rows containing non-finite values (stat_contour).
draw(s_cf2, select = "s(spei_history,L)")
## Warning: Removed 939 rows containing non-finite values (stat_contour).
Didn’t change edf at all really, or shape of smooth, so back to original k for lag and up k for spei
s_cf3 <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(15, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf,
method = "fREML",
select = TRUE
)
edf(s_cf)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.47
## 2 s(spei_history,L) 13.6
edf(s_cf3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.47
## 2 s(spei_history,L) 15.5
draw(s_cf, select = "s(spei_history,L)")
## Warning: Removed 939 rows containing non-finite values (stat_contour).
draw(s_cf3, select = "s(spei_history,L)")
## Warning: Removed 939 rows containing non-finite values (stat_contour).
EDF went up and shape of smooth changed, so check if its enough.
check_res_edf(s_cf3)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.1
Can I reduce number of knots in the lag dimension?
s_cf4 <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(15, 15),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf,
method = "fREML",
select = TRUE
)
edf(s_cf3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.47
## 2 s(spei_history,L) 15.5
edf(s_cf4)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.47
## 2 s(spei_history,L) 15.5
draw(s_cf3, select = "s(spei_history,L)")
## Warning: Removed 939 rows containing non-finite values (stat_contour).
draw(s_cf4, select = "s(spei_history,L)")
## Warning: Removed 939 rows containing non-finite values (stat_contour).
Shape is unchanged
check_res_edf(s_cf4)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.1
Looks good. Final knots for s_cf are:
log_size_prev: 10 spei_history, L: 15,15
rm(s_cf, s_cf2, s_cf3, s_cf4)
s_1ha <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha,
method = "fREML",
select = TRUE
)
set.seed(888)
k.check(s_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 2.841006 0.9469409 0.9725
## s(spei_history,L) 72 9.446201 NA NA
Indicates default number of knots for log_spei_prev is adequate.
check_res_edf(s_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
Knots are too low. Try upping knots for lag dimension
s_1ha2 <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 30),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha,
method = "fREML",
select = TRUE
)
edf(s_1ha)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 2.84
## 2 s(spei_history,L) 9.45
edf(s_1ha2)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 2.84
## 2 s(spei_history,L) 9.48
draw(s_1ha, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
draw(s_1ha2, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
No change, back to original k for lag and increase k for SPEI
s_1ha3 <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(10, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha,
method = "fREML",
select = TRUE
)
edf(s_1ha)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 2.84
## 2 s(spei_history,L) 9.45
edf(s_1ha3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 2.84
## 2 s(spei_history,L) 10.9
draw(s_1ha, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
draw(s_1ha3, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Slight change, check if adequate
check_res_edf(s_1ha3)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
Can I go lower?
s_1ha4 <- bam(
surv ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(10, 10),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha,
method = "fREML",
select = TRUE
)
edf(s_1ha3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 2.84
## 2 s(spei_history,L) 10.9
edf(s_1ha4)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 2.84
## 2 s(spei_history,L) 10.8
draw(s_1ha3, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
draw(s_1ha4, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
check_res_edf(s_1ha3)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
10 knots all around
rm(s_1ha, s_1ha2, s_1ha3, s_1ha4)
# use only living plants
model_data_1ha_2 <- model_data_1ha %>%
dplyr::filter(surv == 1, !is.na(log_size))
model_data_cf_2 <- model_data_cf %>%
dplyr::filter(surv == 1, !is.na(log_size))
g_cf <- bam(log_size ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 18),
xt = list(bs = "cr")),
family = scat(link = "identity"), #like leptokurtic gaussian.
data = model_data_cf_2,
method = "fREML",
select = TRUE)
set.seed(888)
k.check(g_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 5.702907 0.9765955 0.04
## s(spei_history,L) 72 15.694164 NA NA
log_size_prev needs more knots.
g_cf2 <- bam(log_size ~
flwr_prev +
s(log_size_prev, bs = "cr", k = 25) +
s(spei_history, L,
bs = "cb",
k = c(5, 18),
xt = list(bs = "cr")),
family = scat(link = "identity"), #like leptokurtic gaussian.
data = model_data_cf_2,
method = "fREML",
select = TRUE)
set.seed(888)
k.check(g_cf2)
## k' edf k-index p-value
## s(log_size_prev) 24 9.370061 0.9805118 0.0825
## s(spei_history,L) 72 15.750214 NA NA
Kept increasing until edf stabilized and p > 0.05. 25 knots is adequate.
How about the crossbasis?
check_res_edf(g_cf2)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
Try reducing lag dimension knots to save on computation.
g_cf3 <- bam(log_size ~
flwr_prev +
s(log_size_prev, bs = "cr", k = 25) +
s(spei_history, L,
bs = "cb",
k = c(5, 15),
xt = list(bs = "cr")),
family = scat(link = "identity"), #like leptokurtic gaussian.
data = model_data_cf_2,
method = "fREML",
select = TRUE)
edf(g_cf2)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 9.37
## 2 s(spei_history,L) 15.8
edf(g_cf3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 9.37
## 2 s(spei_history,L) 15.7
draw(g_cf2, select = "s(spei_history,L)")
## Warning: Removed 968 rows containing non-finite values (stat_contour).
draw(g_cf3, select = "s(spei_history,L)")
## Warning: Removed 968 rows containing non-finite values (stat_contour).
Not substantially different
Final knots
log_size_prev: 25spei_history, L: 5, 15g_1ha <- bam(log_size ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 15),
xt = list(bs = "cr")),
family = scat(link = "identity"), #like leptokurtic gaussian.
data = model_data_1ha_2,
method = "fREML",
select = TRUE)
set.seed(888)
k.check(g_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 4.126322 1.013243 0.87
## s(spei_history,L) 60 19.230773 NA NA
Default 10 knots for log_size_prev is adequate
check_res_edf(g_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
Also adequate
draw(g_1ha, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Final knots
log_size_prev: 10spei_history, L: 5, 15rm(g_cf, g_cf2, g_cf3, g_1ha)
f_cf <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf_2,
method = "fREML",
select = TRUE)
set.seed(888)
k.check(f_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 5.826355 0.9637795 0.7625
## s(spei_history,L) 72 11.237704 NA NA
Knots for log_size_prev are adequate.
check_res_edf(f_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.5
Possibly too low. Try upping number of knots in the lag dimension.
f_cf2 <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 30),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf_2,
method = "fREML",
select = TRUE)
edf(f_cf)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 5.83
## 2 s(spei_history,L) 11.2
edf(f_cf2)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 5.83
## 2 s(spei_history,L) 11.1
Not substantially different. Instead, try increasing knots in SPEI dimension
f_cf3 <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(15, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf_2,
method = "fREML",
select = TRUE)
edf(f_cf)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 5.83
## 2 s(spei_history,L) 11.2
edf(f_cf3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 5.83
## 2 s(spei_history,L) 11.9
draw(f_cf, select = "s(spei_history,L)")
## Warning: Removed 968 rows containing non-finite values (stat_contour).
draw(f_cf3, select = "s(spei_history,L)")
## Warning: Removed 968 rows containing non-finite values (stat_contour).
Can I decrease knots in lag dimension?
f_cf4 <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(15, 15),
xt = list(bs = "cr")),
family = binomial,
data = model_data_cf_2,
method = "fREML",
select = TRUE)
edf(f_cf3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 5.83
## 2 s(spei_history,L) 11.9
edf(f_cf4)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 5.82
## 2 s(spei_history,L) 12.0
About the same
check_res_edf(f_cf4)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.3
Final knots:
log_size_prev: 10spei_history,L: 15, 15f_1ha <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha_2,
method = "fREML",
select = TRUE)
set.seed(888)
k.check(f_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 3.413398 0.9811181 0.8125
## s(spei_history,L) 72 12.826900 NA NA
Knots for log_size_prev are adequate.
check_res_edf(f_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 16.4
Too few knots for crossbasis. Try increasing knots in lag dimension.
f_1ha2 <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(5, 25),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha_2,
method = "fREML",
select = TRUE)
edf(f_1ha)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.41
## 2 s(spei_history,L) 12.8
edf(f_1ha2)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.41
## 2 s(spei_history,L) 12.9
No substantial change. Set back to 18 and increase knots for SPEI dimension
f_1ha3 <- bam(flwr ~
flwr_prev +
s(log_size_prev, bs = "cr") +
s(spei_history, L,
bs = "cb",
k = c(10, 18),
xt = list(bs = "cr")),
family = binomial,
data = model_data_1ha_2,
method = "fREML",
select = TRUE)
edf(f_1ha)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.41
## 2 s(spei_history,L) 12.8
edf(f_1ha3)
## # A tibble: 2 x 2
## smooth edf
## <chr> <dbl>
## 1 s(log_size_prev) 3.37
## 2 s(spei_history,L) 14.3
draw(f_1ha, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
draw(f_1ha3, select = "s(spei_history,L)")
## Warning: Removed 910 rows containing non-finite values (stat_contour).
(what I feared)
check_res_edf(f_1ha3)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
adequate knots.
Final knots:
log_size_prev: 10spei_history,L: 10,18rm(f_cf, f_cf2, f_cf3, f_cf4, f_1ha, f_1ha2, f_1ha3)
## datetime
Sys.time()
## [1] "2021-08-17 17:08:26 EDT"
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
## Local: revisions /Users/scottericr/Documents/HeliconiaDemography
## Remote: revisions @ origin (https://github.com/BrunaLab/HeliconiaDemography.git)
## Head: [d40aa88] 2021-08-17: function to plot "slices" through 2d smooths
## session info
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices datasets utils
## [8] methods base
##
## other attached packages:
## [1] qqplotr_0.0.5 Hmisc_4.5-0 Formula_1.2-4 survival_3.2-11
## [5] lattice_0.20-44 readxl_1.3.1 colorspace_2.0-1 rmarkdown_2.7
## [9] statmod_1.4.36 latex2exp_0.5.0 gratia_0.6.0.9112 broom_0.7.6
## [13] patchwork_1.1.1 glue_1.4.2 bbmle_1.0.23.1 dlnm_2.4.5
## [17] mgcv_1.8-36 nlme_3.1-152 lubridate_1.7.10 janitor_2.1.0
## [21] tsModel_0.6 SPEI_1.7 lmomco_2.3.6 tsibble_1.0.1
## [25] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5 purrr_0.3.4
## [29] readr_1.4.0 tidyr_1.1.3 tibble_3.1.1 ggplot2_3.3.5
## [33] tidyverse_1.3.1 here_1.0.1 tarchetypes_0.2.0 targets_0.4.2
## [37] dotenv_1.0.3 conflicted_1.0.4
##
## loaded via a namespace (and not attached):
## [1] ellipsis_0.3.2 rprojroot_2.0.2 htmlTable_2.1.0
## [4] snakecase_0.11.0 base64enc_0.1-3 fs_1.5.0
## [7] rstudioapi_0.13 farver_2.1.0 fansi_0.4.2
## [10] mvtnorm_1.1-1 xml2_1.3.2 codetools_0.2-18
## [13] splines_4.0.2 Lmoments_1.3-1 robustbase_0.93-7
## [16] cachem_1.0.4 knitr_1.33 jsonlite_1.7.2
## [19] anytime_0.3.9 cluster_2.1.2 dbplyr_2.1.1
## [22] png_0.1-7 compiler_4.0.2 httr_1.4.2
## [25] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-3
## [28] fastmap_1.1.0 cli_2.5.0 htmltools_0.5.1.1
## [31] tools_4.0.2 igraph_1.2.6 gtable_0.3.0
## [34] Rcpp_1.0.6 jquerylib_0.1.4 cellranger_1.1.0
## [37] vctrs_0.3.8 xfun_0.22 ps_1.6.0
## [40] rvest_1.0.0 lifecycle_1.0.0 renv_0.13.2
## [43] goftest_1.2-2 DEoptimR_1.0-8 MASS_7.3-54
## [46] scales_1.1.1 hms_1.1.0 RColorBrewer_1.1-2
## [49] yaml_2.2.1 mvnfast_0.2.5.1 gridExtra_2.3
## [52] memoise_2.0.0 sass_0.3.1 rpart_4.1-15
## [55] bdsmatrix_1.3-4 latticeExtra_0.6-29 stringi_1.6.2
## [58] highr_0.9 checkmate_2.0.0 rlang_0.4.11
## [61] pkgconfig_2.0.3 evaluate_0.14 labeling_0.4.2
## [64] htmlwidgets_1.5.3 processx_3.5.2 tidyselect_1.1.1
## [67] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
## [70] DBI_1.1.1 pillar_1.6.0 haven_2.4.1
## [73] foreign_0.8-81 withr_2.4.2 nnet_7.3-16
## [76] modelr_0.1.8 crayon_1.4.1 utf8_1.2.1
## [79] jpeg_0.1-8.1 isoband_0.2.4 grid_4.0.2
## [82] data.table_1.14.0 git2r_0.28.0 callr_3.7.0
## [85] reprex_2.0.0 digest_0.6.27 numDeriv_2016.8-1.1
## [88] munsell_0.5.0 bslib_0.2.4